#!/bin/bash
set -x

# 模型配置
# Qwen3-4B-Instruct-2507
MODEL_NAME="Qwen2.5-0.5B-Instruct"
MODEL_NAME_LOWER=$(echo "$MODEL_NAME" | tr '[:upper:]' '[:lower:]' | tr '-' '_')

# 固定配置参数
NPROC_PER_NODE=2  # 使用2个GPU
TRAIN_PATH="/home/yangcx24/Jayx/RAGEN/dataset/merged/train.parquet"
TEST_PATH="/home/yangcx24/Jayx/RAGEN/dataset/merged/test.parquet"
SAVE_PATH="/home/yangcx24/Jayx/RAGEN/checkpoints/${MODEL_NAME_LOWER}_sft"
MODEL_PATH="/home/yangcx24/Jayx/Models/${MODEL_NAME}"

# 设置CUDA可见设备
export CUDA_VISIBLE_DEVICES=2,3

# 运行VERL SFT训练
torchrun --standalone --nnodes=1 --nproc_per_node=$NPROC_PER_NODE \
     -m verl.trainer.fsdp_sft_trainer \
    data.train_batch_size=2 \
    data.micro_batch_size_per_gpu=1 \
    data.train_files=$TRAIN_PATH \
    data.val_files=$TEST_PATH \
    data.multiturn.enable=true \
    data.multiturn.messages_key=messages \
    data.max_length=14000 \
    data.truncation=error \
    data.balance_dp_token=False \
    data.chat_template=null \
    model.partial_pretrain=$MODEL_PATH \
    model.fsdp_config.wrap_policy.min_num_params=0 \
    model.fsdp_config.cpu_offload=False \
    model.fsdp_config.offload_params=False \
    model.enable_gradient_checkpointing=False \
    model.trust_remote_code=False \
    model.lora_rank=0 \
    model.use_liger=False \
    optim.lr=1e-5 \
    optim.betas=[0.9,0.95] \
    optim.weight_decay=0.01 \
    optim.warmup_steps_ratio=0.1 \
    optim.clip_grad=1.0 \
    optim.lr_scheduler=cosine \
    ulysses_sequence_parallel_size=2 \
    use_remove_padding=true \
    trainer.default_local_dir=$SAVE_PATH \
    trainer.default_hdfs_dir=null \
    trainer.resume_path=$SAVE_PATH \
    trainer.project_name=SFT \
    trainer.experiment_name=${MODEL_NAME_LOWER}-sft \
    trainer.total_epochs=20 \
    trainer.total_training_steps=null \
    model.fsdp_config.cpu_offload=False \
    trainer.logger=['swanlab'] \
    trainer.seed=1 \
    data.truncation=right
